{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2d124d22-de73-436b-86cd-9b162b469be8",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: pip in /opt/conda/lib/python3.11/site-packages (24.2)\n",
      "Note: you may need to restart the kernel to use updated packages.\n",
      "Found existing installation: langchain-core 0.3.6\n",
      "Uninstalling langchain-core-0.3.6:\n",
      "  Successfully uninstalled langchain-core-0.3.6\n",
      "Found existing installation: langchain-openai 0.2.1\n",
      "Uninstalling langchain-openai-0.2.1:\n",
      "  Successfully uninstalled langchain-openai-0.2.1\n",
      "Found existing installation: langchain-experimental 0.3.2\n",
      "Uninstalling langchain-experimental-0.3.2:\n",
      "  Successfully uninstalled langchain-experimental-0.3.2\n",
      "Found existing installation: beautifulsoup4 4.12.3\n",
      "Uninstalling beautifulsoup4-4.12.3:\n",
      "  Successfully uninstalled beautifulsoup4-4.12.3\n",
      "Found existing installation: langchain-community 0.3.1\n",
      "Uninstalling langchain-community-0.3.1:\n",
      "  Successfully uninstalled langchain-community-0.3.1\n",
      "Found existing installation: langchain 0.3.1\n",
      "Uninstalling langchain-0.3.1:\n",
      "  Successfully uninstalled langchain-0.3.1\n",
      "Found existing installation: chromadb 0.5.11\n",
      "Uninstalling chromadb-0.5.11:\n",
      "  Successfully uninstalled chromadb-0.5.11\n",
      "Note: you may need to restart the kernel to use updated packages.\n",
      "Collecting langchain-core==0.3.6\n",
      "  Using cached langchain_core-0.3.6-py3-none-any.whl.metadata (6.3 kB)\n",
      "Requirement already satisfied: PyYAML>=5.3 in /opt/conda/lib/python3.11/site-packages (from langchain-core==0.3.6) (6.0.1)\n",
      "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/conda/lib/python3.11/site-packages (from langchain-core==0.3.6) (1.33)\n",
      "Requirement already satisfied: langsmith<0.2.0,>=0.1.125 in /opt/conda/lib/python3.11/site-packages (from langchain-core==0.3.6) (0.1.129)\n",
      "Requirement already satisfied: packaging<25,>=23.2 in /opt/conda/lib/python3.11/site-packages (from langchain-core==0.3.6) (23.2)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.5.2 in /opt/conda/lib/python3.11/site-packages (from langchain-core==0.3.6) (2.9.2)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /home/jovyan/.local/lib/python3.11/site-packages (from langchain-core==0.3.6) (8.3.0)\n",
      "Requirement already satisfied: typing-extensions>=4.7 in /opt/conda/lib/python3.11/site-packages (from langchain-core==0.3.6) (4.12.2)\n",
      "Requirement already satisfied: jsonpointer>=1.9 in /opt/conda/lib/python3.11/site-packages (from jsonpatch<2.0,>=1.33->langchain-core==0.3.6) (2.4)\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in /home/jovyan/.local/lib/python3.11/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-core==0.3.6) (0.27.0)\n",
      "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /home/jovyan/.local/lib/python3.11/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-core==0.3.6) (3.10.3)\n",
      "Requirement already satisfied: requests<3,>=2 in /opt/conda/lib/python3.11/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-core==0.3.6) (2.31.0)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /home/jovyan/.local/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.5.2->langchain-core==0.3.6) (0.6.0)\n",
      "Requirement already satisfied: pydantic-core==2.23.4 in /opt/conda/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.5.2->langchain-core==0.3.6) (2.23.4)\n",
      "Requirement already satisfied: anyio in /home/jovyan/.local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core==0.3.6) (3.7.1)\n",
      "Requirement already satisfied: certifi in /opt/conda/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core==0.3.6) (2024.8.30)\n",
      "Requirement already satisfied: httpcore==1.* in /home/jovyan/.local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core==0.3.6) (1.0.5)\n",
      "Requirement already satisfied: idna in /opt/conda/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core==0.3.6) (3.4)\n",
      "Requirement already satisfied: sniffio in /opt/conda/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core==0.3.6) (1.3.0)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in /home/jovyan/.local/lib/python3.11/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core==0.3.6) (0.14.0)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.125->langchain-core==0.3.6) (3.3.0)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.125->langchain-core==0.3.6) (2.0.7)\n",
      "Using cached langchain_core-0.3.6-py3-none-any.whl (399 kB)\n",
      "Installing collected packages: langchain-core\n",
      "Successfully installed langchain-core-0.3.6\n",
      "Note: you may need to restart the kernel to use updated packages.\n",
      "Collecting langchain-openai==0.2.1\n",
      "  Using cached langchain_openai-0.2.1-py3-none-any.whl.metadata (2.6 kB)\n",
      "Requirement already satisfied: langchain-core<0.4,>=0.3 in /opt/conda/lib/python3.11/site-packages (from langchain-openai==0.2.1) (0.3.6)\n",
      "Requirement already satisfied: openai<2.0.0,>=1.40.0 in /opt/conda/lib/python3.11/site-packages (from langchain-openai==0.2.1) (1.43.0)\n",
      "Requirement already satisfied: tiktoken<1,>=0.7 in /opt/conda/lib/python3.11/site-packages (from langchain-openai==0.2.1) (0.7.0)\n",
      "Requirement already satisfied: PyYAML>=5.3 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4,>=0.3->langchain-openai==0.2.1) (6.0.1)\n",
      "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4,>=0.3->langchain-openai==0.2.1) (1.33)\n",
      "Requirement already satisfied: langsmith<0.2.0,>=0.1.125 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4,>=0.3->langchain-openai==0.2.1) (0.1.129)\n",
      "Requirement already satisfied: packaging<25,>=23.2 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4,>=0.3->langchain-openai==0.2.1) (23.2)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.5.2 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4,>=0.3->langchain-openai==0.2.1) (2.9.2)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /home/jovyan/.local/lib/python3.11/site-packages (from langchain-core<0.4,>=0.3->langchain-openai==0.2.1) (8.3.0)\n",
      "Requirement already satisfied: typing-extensions>=4.7 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4,>=0.3->langchain-openai==0.2.1) (4.12.2)\n",
      "Requirement already satisfied: anyio<5,>=3.5.0 in /home/jovyan/.local/lib/python3.11/site-packages (from openai<2.0.0,>=1.40.0->langchain-openai==0.2.1) (3.7.1)\n",
      "Requirement already satisfied: distro<2,>=1.7.0 in /home/jovyan/.local/lib/python3.11/site-packages (from openai<2.0.0,>=1.40.0->langchain-openai==0.2.1) (1.9.0)\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in /home/jovyan/.local/lib/python3.11/site-packages (from openai<2.0.0,>=1.40.0->langchain-openai==0.2.1) (0.27.0)\n",
      "Requirement already satisfied: jiter<1,>=0.4.0 in /opt/conda/lib/python3.11/site-packages (from openai<2.0.0,>=1.40.0->langchain-openai==0.2.1) (0.5.0)\n",
      "Requirement already satisfied: sniffio in /opt/conda/lib/python3.11/site-packages (from openai<2.0.0,>=1.40.0->langchain-openai==0.2.1) (1.3.0)\n",
      "Requirement already satisfied: tqdm>4 in /opt/conda/lib/python3.11/site-packages (from openai<2.0.0,>=1.40.0->langchain-openai==0.2.1) (4.66.1)\n",
      "Requirement already satisfied: regex>=2022.1.18 in /opt/conda/lib/python3.11/site-packages (from tiktoken<1,>=0.7->langchain-openai==0.2.1) (2024.7.24)\n",
      "Requirement already satisfied: requests>=2.26.0 in /opt/conda/lib/python3.11/site-packages (from tiktoken<1,>=0.7->langchain-openai==0.2.1) (2.31.0)\n",
      "Requirement already satisfied: idna>=2.8 in /opt/conda/lib/python3.11/site-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.40.0->langchain-openai==0.2.1) (3.4)\n",
      "Requirement already satisfied: certifi in /opt/conda/lib/python3.11/site-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.40.0->langchain-openai==0.2.1) (2024.8.30)\n",
      "Requirement already satisfied: httpcore==1.* in /home/jovyan/.local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.40.0->langchain-openai==0.2.1) (1.0.5)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in /home/jovyan/.local/lib/python3.11/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.40.0->langchain-openai==0.2.1) (0.14.0)\n",
      "Requirement already satisfied: jsonpointer>=1.9 in /opt/conda/lib/python3.11/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4,>=0.3->langchain-openai==0.2.1) (2.4)\n",
      "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /home/jovyan/.local/lib/python3.11/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-core<0.4,>=0.3->langchain-openai==0.2.1) (3.10.3)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /home/jovyan/.local/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.5.2->langchain-core<0.4,>=0.3->langchain-openai==0.2.1) (0.6.0)\n",
      "Requirement already satisfied: pydantic-core==2.23.4 in /opt/conda/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.5.2->langchain-core<0.4,>=0.3->langchain-openai==0.2.1) (2.23.4)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests>=2.26.0->tiktoken<1,>=0.7->langchain-openai==0.2.1) (3.3.0)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests>=2.26.0->tiktoken<1,>=0.7->langchain-openai==0.2.1) (2.0.7)\n",
      "Using cached langchain_openai-0.2.1-py3-none-any.whl (49 kB)\n",
      "Installing collected packages: langchain-openai\n",
      "Successfully installed langchain-openai-0.2.1\n",
      "Note: you may need to restart the kernel to use updated packages.\n",
      "Collecting langchain-experimental==0.3.2\n",
      "  Using cached langchain_experimental-0.3.2-py3-none-any.whl.metadata (1.7 kB)\n",
      "Collecting langchain-community<0.4.0,>=0.3.0 (from langchain-experimental==0.3.2)\n",
      "  Using cached langchain_community-0.3.1-py3-none-any.whl.metadata (2.8 kB)\n",
      "Requirement already satisfied: langchain-core<0.4.0,>=0.3.6 in /opt/conda/lib/python3.11/site-packages (from langchain-experimental==0.3.2) (0.3.6)\n",
      "Requirement already satisfied: PyYAML>=5.3 in /opt/conda/lib/python3.11/site-packages (from langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (6.0.1)\n",
      "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /opt/conda/lib/python3.11/site-packages (from langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (2.0.22)\n",
      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /home/jovyan/.local/lib/python3.11/site-packages (from langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (3.9.5)\n",
      "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /home/jovyan/.local/lib/python3.11/site-packages (from langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (0.6.6)\n",
      "Collecting langchain<0.4.0,>=0.3.1 (from langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2)\n",
      "  Using cached langchain-0.3.1-py3-none-any.whl.metadata (7.1 kB)\n",
      "Requirement already satisfied: langsmith<0.2.0,>=0.1.125 in /opt/conda/lib/python3.11/site-packages (from langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (0.1.129)\n",
      "Requirement already satisfied: numpy<2,>=1 in /home/jovyan/.local/lib/python3.11/site-packages (from langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (1.26.4)\n",
      "Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in /opt/conda/lib/python3.11/site-packages (from langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (2.5.2)\n",
      "Requirement already satisfied: requests<3,>=2 in /opt/conda/lib/python3.11/site-packages (from langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (2.31.0)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /home/jovyan/.local/lib/python3.11/site-packages (from langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (8.3.0)\n",
      "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4.0,>=0.3.6->langchain-experimental==0.3.2) (1.33)\n",
      "Requirement already satisfied: packaging<25,>=23.2 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4.0,>=0.3.6->langchain-experimental==0.3.2) (23.2)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.5.2 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4.0,>=0.3.6->langchain-experimental==0.3.2) (2.9.2)\n",
      "Requirement already satisfied: typing-extensions>=4.7 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4.0,>=0.3.6->langchain-experimental==0.3.2) (4.12.2)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /home/jovyan/.local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (1.3.1)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (23.1.0)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /home/jovyan/.local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (1.4.1)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /home/jovyan/.local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (6.0.5)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /home/jovyan/.local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (1.9.4)\n",
      "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /home/jovyan/.local/lib/python3.11/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (3.21.2)\n",
      "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /home/jovyan/.local/lib/python3.11/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (0.9.0)\n",
      "Requirement already satisfied: jsonpointer>=1.9 in /opt/conda/lib/python3.11/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.6->langchain-experimental==0.3.2) (2.4)\n",
      "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in /opt/conda/lib/python3.11/site-packages (from langchain<0.4.0,>=0.3.1->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (0.3.0)\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in /home/jovyan/.local/lib/python3.11/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (0.27.0)\n",
      "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /home/jovyan/.local/lib/python3.11/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (3.10.3)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /home/jovyan/.local/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.5.2->langchain-core<0.4.0,>=0.3.6->langchain-experimental==0.3.2) (0.6.0)\n",
      "Requirement already satisfied: pydantic-core==2.23.4 in /opt/conda/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.5.2->langchain-core<0.4.0,>=0.3.6->langchain-experimental==0.3.2) (2.23.4)\n",
      "Requirement already satisfied: python-dotenv>=0.21.0 in /home/jovyan/.local/lib/python3.11/site-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (1.0.1)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (3.3.0)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (3.4)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (2.0.7)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (2024.8.30)\n",
      "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/lib/python3.11/site-packages (from SQLAlchemy<3,>=1.4->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (3.0.0)\n",
      "Requirement already satisfied: anyio in /home/jovyan/.local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (3.7.1)\n",
      "Requirement already satisfied: httpcore==1.* in /home/jovyan/.local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (1.0.5)\n",
      "Requirement already satisfied: sniffio in /opt/conda/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (1.3.0)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in /home/jovyan/.local/lib/python3.11/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (0.14.0)\n",
      "Requirement already satisfied: mypy-extensions>=0.3.0 in /home/jovyan/.local/lib/python3.11/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community<0.4.0,>=0.3.0->langchain-experimental==0.3.2) (1.0.0)\n",
      "Using cached langchain_experimental-0.3.2-py3-none-any.whl (208 kB)\n",
      "Using cached langchain_community-0.3.1-py3-none-any.whl (2.4 MB)\n",
      "Using cached langchain-0.3.1-py3-none-any.whl (1.0 MB)\n",
      "Installing collected packages: langchain, langchain-community, langchain-experimental\n",
      "Successfully installed langchain-0.3.1 langchain-community-0.3.1 langchain-experimental-0.3.2\n",
      "Note: you may need to restart the kernel to use updated packages.\n",
      "Requirement already satisfied: langchain-community==0.3.1 in /opt/conda/lib/python3.11/site-packages (0.3.1)\n",
      "Requirement already satisfied: PyYAML>=5.3 in /opt/conda/lib/python3.11/site-packages (from langchain-community==0.3.1) (6.0.1)\n",
      "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /opt/conda/lib/python3.11/site-packages (from langchain-community==0.3.1) (2.0.22)\n",
      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /home/jovyan/.local/lib/python3.11/site-packages (from langchain-community==0.3.1) (3.9.5)\n",
      "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /home/jovyan/.local/lib/python3.11/site-packages (from langchain-community==0.3.1) (0.6.6)\n",
      "Requirement already satisfied: langchain<0.4.0,>=0.3.1 in /opt/conda/lib/python3.11/site-packages (from langchain-community==0.3.1) (0.3.1)\n",
      "Requirement already satisfied: langchain-core<0.4.0,>=0.3.6 in /opt/conda/lib/python3.11/site-packages (from langchain-community==0.3.1) (0.3.6)\n",
      "Requirement already satisfied: langsmith<0.2.0,>=0.1.125 in /opt/conda/lib/python3.11/site-packages (from langchain-community==0.3.1) (0.1.129)\n",
      "Requirement already satisfied: numpy<2,>=1 in /home/jovyan/.local/lib/python3.11/site-packages (from langchain-community==0.3.1) (1.26.4)\n",
      "Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in /opt/conda/lib/python3.11/site-packages (from langchain-community==0.3.1) (2.5.2)\n",
      "Requirement already satisfied: requests<3,>=2 in /opt/conda/lib/python3.11/site-packages (from langchain-community==0.3.1) (2.31.0)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /home/jovyan/.local/lib/python3.11/site-packages (from langchain-community==0.3.1) (8.3.0)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /home/jovyan/.local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community==0.3.1) (1.3.1)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community==0.3.1) (23.1.0)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /home/jovyan/.local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community==0.3.1) (1.4.1)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /home/jovyan/.local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community==0.3.1) (6.0.5)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /home/jovyan/.local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community==0.3.1) (1.9.4)\n",
      "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /home/jovyan/.local/lib/python3.11/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community==0.3.1) (3.21.2)\n",
      "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /home/jovyan/.local/lib/python3.11/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community==0.3.1) (0.9.0)\n",
      "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in /opt/conda/lib/python3.11/site-packages (from langchain<0.4.0,>=0.3.1->langchain-community==0.3.1) (0.3.0)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /opt/conda/lib/python3.11/site-packages (from langchain<0.4.0,>=0.3.1->langchain-community==0.3.1) (2.9.2)\n",
      "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4.0,>=0.3.6->langchain-community==0.3.1) (1.33)\n",
      "Requirement already satisfied: packaging<25,>=23.2 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4.0,>=0.3.6->langchain-community==0.3.1) (23.2)\n",
      "Requirement already satisfied: typing-extensions>=4.7 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4.0,>=0.3.6->langchain-community==0.3.1) (4.12.2)\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in /home/jovyan/.local/lib/python3.11/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-community==0.3.1) (0.27.0)\n",
      "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /home/jovyan/.local/lib/python3.11/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-community==0.3.1) (3.10.3)\n",
      "Requirement already satisfied: python-dotenv>=0.21.0 in /home/jovyan/.local/lib/python3.11/site-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain-community==0.3.1) (1.0.1)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langchain-community==0.3.1) (3.3.0)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langchain-community==0.3.1) (3.4)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langchain-community==0.3.1) (2.0.7)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langchain-community==0.3.1) (2024.8.30)\n",
      "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/lib/python3.11/site-packages (from SQLAlchemy<3,>=1.4->langchain-community==0.3.1) (3.0.0)\n",
      "Requirement already satisfied: anyio in /home/jovyan/.local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community==0.3.1) (3.7.1)\n",
      "Requirement already satisfied: httpcore==1.* in /home/jovyan/.local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community==0.3.1) (1.0.5)\n",
      "Requirement already satisfied: sniffio in /opt/conda/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community==0.3.1) (1.3.0)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in /home/jovyan/.local/lib/python3.11/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community==0.3.1) (0.14.0)\n",
      "Requirement already satisfied: jsonpointer>=1.9 in /opt/conda/lib/python3.11/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.6->langchain-community==0.3.1) (2.4)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /home/jovyan/.local/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.7.4->langchain<0.4.0,>=0.3.1->langchain-community==0.3.1) (0.6.0)\n",
      "Requirement already satisfied: pydantic-core==2.23.4 in /opt/conda/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.7.4->langchain<0.4.0,>=0.3.1->langchain-community==0.3.1) (2.23.4)\n",
      "Requirement already satisfied: mypy-extensions>=0.3.0 in /home/jovyan/.local/lib/python3.11/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community==0.3.1) (1.0.0)\n",
      "Note: you may need to restart the kernel to use updated packages.\n",
      "Requirement already satisfied: langchain==0.3.1 in /opt/conda/lib/python3.11/site-packages (0.3.1)\n",
      "Requirement already satisfied: PyYAML>=5.3 in /opt/conda/lib/python3.11/site-packages (from langchain==0.3.1) (6.0.1)\n",
      "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /opt/conda/lib/python3.11/site-packages (from langchain==0.3.1) (2.0.22)\n",
      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /home/jovyan/.local/lib/python3.11/site-packages (from langchain==0.3.1) (3.9.5)\n",
      "Requirement already satisfied: langchain-core<0.4.0,>=0.3.6 in /opt/conda/lib/python3.11/site-packages (from langchain==0.3.1) (0.3.6)\n",
      "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in /opt/conda/lib/python3.11/site-packages (from langchain==0.3.1) (0.3.0)\n",
      "Requirement already satisfied: langsmith<0.2.0,>=0.1.17 in /opt/conda/lib/python3.11/site-packages (from langchain==0.3.1) (0.1.129)\n",
      "Requirement already satisfied: numpy<2,>=1 in /home/jovyan/.local/lib/python3.11/site-packages (from langchain==0.3.1) (1.26.4)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /opt/conda/lib/python3.11/site-packages (from langchain==0.3.1) (2.9.2)\n",
      "Requirement already satisfied: requests<3,>=2 in /opt/conda/lib/python3.11/site-packages (from langchain==0.3.1) (2.31.0)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /home/jovyan/.local/lib/python3.11/site-packages (from langchain==0.3.1) (8.3.0)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /home/jovyan/.local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.3.1) (1.3.1)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.3.1) (23.1.0)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /home/jovyan/.local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.3.1) (1.4.1)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /home/jovyan/.local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.3.1) (6.0.5)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /home/jovyan/.local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.3.1) (1.9.4)\n",
      "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4.0,>=0.3.6->langchain==0.3.1) (1.33)\n",
      "Requirement already satisfied: packaging<25,>=23.2 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4.0,>=0.3.6->langchain==0.3.1) (23.2)\n",
      "Requirement already satisfied: typing-extensions>=4.7 in /opt/conda/lib/python3.11/site-packages (from langchain-core<0.4.0,>=0.3.6->langchain==0.3.1) (4.12.2)\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in /home/jovyan/.local/lib/python3.11/site-packages (from langsmith<0.2.0,>=0.1.17->langchain==0.3.1) (0.27.0)\n",
      "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /home/jovyan/.local/lib/python3.11/site-packages (from langsmith<0.2.0,>=0.1.17->langchain==0.3.1) (3.10.3)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /home/jovyan/.local/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.7.4->langchain==0.3.1) (0.6.0)\n",
      "Requirement already satisfied: pydantic-core==2.23.4 in /opt/conda/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.7.4->langchain==0.3.1) (2.23.4)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langchain==0.3.1) (3.3.0)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langchain==0.3.1) (3.4)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langchain==0.3.1) (2.0.7)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.11/site-packages (from requests<3,>=2->langchain==0.3.1) (2024.8.30)\n",
      "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/lib/python3.11/site-packages (from SQLAlchemy<3,>=1.4->langchain==0.3.1) (3.0.0)\n",
      "Requirement already satisfied: anyio in /home/jovyan/.local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain==0.3.1) (3.7.1)\n",
      "Requirement already satisfied: httpcore==1.* in /home/jovyan/.local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain==0.3.1) (1.0.5)\n",
      "Requirement already satisfied: sniffio in /opt/conda/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain==0.3.1) (1.3.0)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in /home/jovyan/.local/lib/python3.11/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain==0.3.1) (0.14.0)\n",
      "Requirement already satisfied: jsonpointer>=1.9 in /opt/conda/lib/python3.11/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.6->langchain==0.3.1) (2.4)\n",
      "Note: you may need to restart the kernel to use updated packages.\n",
      "Collecting chromadb==0.5.11\n",
      "  Using cached chromadb-0.5.11-py3-none-any.whl.metadata (6.8 kB)\n",
      "Requirement already satisfied: build>=1.0.3 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (1.2.1)\n",
      "Requirement already satisfied: pydantic>=1.9 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (2.9.2)\n",
      "Requirement already satisfied: chroma-hnswlib==0.7.6 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (0.7.6)\n",
      "Requirement already satisfied: fastapi>=0.95.2 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (0.113.0)\n",
      "Requirement already satisfied: uvicorn>=0.18.3 in /opt/conda/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb==0.5.11) (0.30.6)\n",
      "Requirement already satisfied: numpy>=1.22.5 in /home/jovyan/.local/lib/python3.11/site-packages (from chromadb==0.5.11) (1.26.4)\n",
      "Requirement already satisfied: posthog>=2.4.0 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (3.6.3)\n",
      "Requirement already satisfied: typing-extensions>=4.5.0 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (4.12.2)\n",
      "Requirement already satisfied: onnxruntime>=1.14.1 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (1.19.2)\n",
      "Requirement already satisfied: opentelemetry-api>=1.2.0 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (1.27.0)\n",
      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (1.27.0)\n",
      "Requirement already satisfied: opentelemetry-instrumentation-fastapi>=0.41b0 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (0.48b0)\n",
      "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (1.27.0)\n",
      "Requirement already satisfied: tokenizers>=0.13.2 in /home/jovyan/.local/lib/python3.11/site-packages (from chromadb==0.5.11) (0.19.1)\n",
      "Requirement already satisfied: pypika>=0.48.9 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (0.48.9)\n",
      "Requirement already satisfied: tqdm>=4.65.0 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (4.66.1)\n",
      "Requirement already satisfied: overrides>=7.3.1 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (7.4.0)\n",
      "Requirement already satisfied: importlib-resources in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (6.1.0)\n",
      "Requirement already satisfied: grpcio>=1.58.0 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (1.66.1)\n",
      "Requirement already satisfied: bcrypt>=4.0.1 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (4.2.0)\n",
      "Requirement already satisfied: typer>=0.9.0 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (0.12.5)\n",
      "Requirement already satisfied: kubernetes>=28.1.0 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (30.1.0)\n",
      "Requirement already satisfied: tenacity>=8.2.3 in /home/jovyan/.local/lib/python3.11/site-packages (from chromadb==0.5.11) (8.3.0)\n",
      "Requirement already satisfied: PyYAML>=6.0.0 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (6.0.1)\n",
      "Requirement already satisfied: mmh3>=4.0.1 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (4.1.0)\n",
      "Requirement already satisfied: orjson>=3.9.12 in /home/jovyan/.local/lib/python3.11/site-packages (from chromadb==0.5.11) (3.10.3)\n",
      "Requirement already satisfied: httpx>=0.27.0 in /home/jovyan/.local/lib/python3.11/site-packages (from chromadb==0.5.11) (0.27.0)\n",
      "Requirement already satisfied: rich>=10.11.0 in /opt/conda/lib/python3.11/site-packages (from chromadb==0.5.11) (13.8.0)\n",
      "Requirement already satisfied: packaging>=19.1 in /opt/conda/lib/python3.11/site-packages (from build>=1.0.3->chromadb==0.5.11) (23.2)\n",
      "Requirement already satisfied: pyproject_hooks in /opt/conda/lib/python3.11/site-packages (from build>=1.0.3->chromadb==0.5.11) (1.1.0)\n",
      "Requirement already satisfied: starlette<0.39.0,>=0.37.2 in /opt/conda/lib/python3.11/site-packages (from fastapi>=0.95.2->chromadb==0.5.11) (0.38.4)\n",
      "Requirement already satisfied: anyio in /home/jovyan/.local/lib/python3.11/site-packages (from httpx>=0.27.0->chromadb==0.5.11) (3.7.1)\n",
      "Requirement already satisfied: certifi in /opt/conda/lib/python3.11/site-packages (from httpx>=0.27.0->chromadb==0.5.11) (2024.8.30)\n",
      "Requirement already satisfied: httpcore==1.* in /home/jovyan/.local/lib/python3.11/site-packages (from httpx>=0.27.0->chromadb==0.5.11) (1.0.5)\n",
      "Requirement already satisfied: idna in /opt/conda/lib/python3.11/site-packages (from httpx>=0.27.0->chromadb==0.5.11) (3.4)\n",
      "Requirement already satisfied: sniffio in /opt/conda/lib/python3.11/site-packages (from httpx>=0.27.0->chromadb==0.5.11) (1.3.0)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in /home/jovyan/.local/lib/python3.11/site-packages (from httpcore==1.*->httpx>=0.27.0->chromadb==0.5.11) (0.14.0)\n",
      "Requirement already satisfied: six>=1.9.0 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb==0.5.11) (1.16.0)\n",
      "Requirement already satisfied: python-dateutil>=2.5.3 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb==0.5.11) (2.8.2)\n",
      "Requirement already satisfied: google-auth>=1.0.1 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb==0.5.11) (2.34.0)\n",
      "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb==0.5.11) (1.6.4)\n",
      "Requirement already satisfied: requests in /opt/conda/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb==0.5.11) (2.31.0)\n",
      "Requirement already satisfied: requests-oauthlib in /opt/conda/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb==0.5.11) (2.0.0)\n",
      "Requirement already satisfied: oauthlib>=3.2.2 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb==0.5.11) (3.2.2)\n",
      "Requirement already satisfied: urllib3>=1.24.2 in /opt/conda/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb==0.5.11) (2.0.7)\n",
      "Requirement already satisfied: coloredlogs in /opt/conda/lib/python3.11/site-packages (from onnxruntime>=1.14.1->chromadb==0.5.11) (15.0.1)\n",
      "Requirement already satisfied: flatbuffers in /opt/conda/lib/python3.11/site-packages (from onnxruntime>=1.14.1->chromadb==0.5.11) (24.3.25)\n",
      "Requirement already satisfied: protobuf in /opt/conda/lib/python3.11/site-packages (from onnxruntime>=1.14.1->chromadb==0.5.11) (4.25.4)\n",
      "Requirement already satisfied: sympy in /opt/conda/lib/python3.11/site-packages (from onnxruntime>=1.14.1->chromadb==0.5.11) (1.13.2)\n",
      "Requirement already satisfied: deprecated>=1.2.6 in /opt/conda/lib/python3.11/site-packages (from opentelemetry-api>=1.2.0->chromadb==0.5.11) (1.2.14)\n",
      "Requirement already satisfied: importlib-metadata<=8.4.0,>=6.0 in /opt/conda/lib/python3.11/site-packages (from opentelemetry-api>=1.2.0->chromadb==0.5.11) (6.8.0)\n",
      "Requirement already satisfied: googleapis-common-protos~=1.52 in /opt/conda/lib/python3.11/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb==0.5.11) (1.65.0)\n",
      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.27.0 in /opt/conda/lib/python3.11/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb==0.5.11) (1.27.0)\n",
      "Requirement already satisfied: opentelemetry-proto==1.27.0 in /opt/conda/lib/python3.11/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb==0.5.11) (1.27.0)\n",
      "Requirement already satisfied: opentelemetry-instrumentation-asgi==0.48b0 in /opt/conda/lib/python3.11/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.5.11) (0.48b0)\n",
      "Requirement already satisfied: opentelemetry-instrumentation==0.48b0 in /opt/conda/lib/python3.11/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.5.11) (0.48b0)\n",
      "Requirement already satisfied: opentelemetry-semantic-conventions==0.48b0 in /opt/conda/lib/python3.11/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.5.11) (0.48b0)\n",
      "Requirement already satisfied: opentelemetry-util-http==0.48b0 in /opt/conda/lib/python3.11/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.5.11) (0.48b0)\n",
      "Requirement already satisfied: setuptools>=16.0 in /opt/conda/lib/python3.11/site-packages (from opentelemetry-instrumentation==0.48b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.5.11) (68.2.2)\n",
      "Requirement already satisfied: wrapt<2.0.0,>=1.0.0 in /opt/conda/lib/python3.11/site-packages (from opentelemetry-instrumentation==0.48b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.5.11) (1.16.0)\n",
      "Requirement already satisfied: asgiref~=3.0 in /opt/conda/lib/python3.11/site-packages (from opentelemetry-instrumentation-asgi==0.48b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.5.11) (3.8.1)\n",
      "Requirement already satisfied: monotonic>=1.5 in /opt/conda/lib/python3.11/site-packages (from posthog>=2.4.0->chromadb==0.5.11) (1.6)\n",
      "Requirement already satisfied: backoff>=1.10.0 in /opt/conda/lib/python3.11/site-packages (from posthog>=2.4.0->chromadb==0.5.11) (2.2.1)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /home/jovyan/.local/lib/python3.11/site-packages (from pydantic>=1.9->chromadb==0.5.11) (0.6.0)\n",
      "Requirement already satisfied: pydantic-core==2.23.4 in /opt/conda/lib/python3.11/site-packages (from pydantic>=1.9->chromadb==0.5.11) (2.23.4)\n",
      "Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/conda/lib/python3.11/site-packages (from rich>=10.11.0->chromadb==0.5.11) (3.0.0)\n",
      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/conda/lib/python3.11/site-packages (from rich>=10.11.0->chromadb==0.5.11) (2.16.1)\n",
      "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /opt/conda/lib/python3.11/site-packages (from tokenizers>=0.13.2->chromadb==0.5.11) (0.24.6)\n",
      "Requirement already satisfied: click>=8.0.0 in /opt/conda/lib/python3.11/site-packages (from typer>=0.9.0->chromadb==0.5.11) (8.1.7)\n",
      "Requirement already satisfied: shellingham>=1.3.0 in /opt/conda/lib/python3.11/site-packages (from typer>=0.9.0->chromadb==0.5.11) (1.5.4)\n",
      "Requirement already satisfied: httptools>=0.5.0 in /opt/conda/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb==0.5.11) (0.6.1)\n",
      "Requirement already satisfied: python-dotenv>=0.13 in /home/jovyan/.local/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb==0.5.11) (1.0.1)\n",
      "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /opt/conda/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb==0.5.11) (0.20.0)\n",
      "Requirement already satisfied: watchfiles>=0.13 in /opt/conda/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb==0.5.11) (0.24.0)\n",
      "Requirement already satisfied: websockets>=10.4 in /home/jovyan/.local/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb==0.5.11) (11.0.3)\n",
      "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/conda/lib/python3.11/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb==0.5.11) (5.5.0)\n",
      "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.11/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb==0.5.11) (0.4.0)\n",
      "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.11/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb==0.5.11) (4.9)\n",
      "Requirement already satisfied: filelock in /opt/conda/lib/python3.11/site-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.2->chromadb==0.5.11) (3.15.4)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /home/jovyan/.local/lib/python3.11/site-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers>=0.13.2->chromadb==0.5.11) (2024.3.1)\n",
      "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.11/site-packages (from importlib-metadata<=8.4.0,>=6.0->opentelemetry-api>=1.2.0->chromadb==0.5.11) (3.17.0)\n",
      "Requirement already satisfied: mdurl~=0.1 in /opt/conda/lib/python3.11/site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->chromadb==0.5.11) (0.1.2)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests->kubernetes>=28.1.0->chromadb==0.5.11) (3.3.0)\n",
      "Requirement already satisfied: humanfriendly>=9.1 in /opt/conda/lib/python3.11/site-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb==0.5.11) (10.0)\n",
      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/conda/lib/python3.11/site-packages (from sympy->onnxruntime>=1.14.1->chromadb==0.5.11) (1.3.0)\n",
      "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/conda/lib/python3.11/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=28.1.0->chromadb==0.5.11) (0.6.0)\n",
      "Using cached chromadb-0.5.11-py3-none-any.whl (603 kB)\n",
      "Installing collected packages: chromadb\n",
      "Successfully installed chromadb-0.5.11\n",
      "Note: you may need to restart the kernel to use updated packages.\n",
      "Collecting beautifulsoup4==4.12.3\n",
      "  Using cached beautifulsoup4-4.12.3-py3-none-any.whl.metadata (3.8 kB)\n",
      "Requirement already satisfied: soupsieve>1.2 in /opt/conda/lib/python3.11/site-packages (from beautifulsoup4==4.12.3) (2.5)\n",
      "Using cached beautifulsoup4-4.12.3-py3-none-any.whl (147 kB)\n",
      "Installing collected packages: beautifulsoup4\n",
      "Successfully installed beautifulsoup4-4.12.3\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "%pip install --upgrade pip\n",
    "\n",
    "# Uninstall conflicting packages\n",
    "%pip uninstall -y langchain-core langchain-openai langchain-experimental beautifulsoup4 langchain-community langchain chromadb beautifulsoup4\n",
    "\n",
    "# Install compatible versions of langchain-core and langchain-openai\n",
    "%pip install langchain-core==0.3.6\n",
    "%pip install langchain-openai==0.2.1\n",
    "%pip install langchain-experimental==0.3.2\n",
    "%pip install langchain-community==0.3.1\n",
    "%pip install langchain==0.3.1\n",
    "\n",
    "# Install remaining packages\n",
    "%pip install chromadb==0.5.11\n",
    "%pip install beautifulsoup4==4.12.3\n",
    "\n",
    "# Restart the kernel after installation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "f884314f-870c-4bfb-b6c1-a5b4801ec172",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['USER_AGENT'] = 'RAGUserAgent'\n",
    "from langchain_community.document_loaders import WebBaseLoader\n",
    "import bs4\n",
    "import openai\n",
    "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
    "from langchain import hub\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
    "from langchain_core.runnables import RunnablePassthrough\n",
    "import chromadb\n",
    "from langchain_community.vectorstores import Chroma\n",
    "from langchain_experimental.text_splitter import SemanticChunker\n",
    "\n",
    "# new\n",
    "from langchain_core.runnables import RunnableParallel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "721241b4-32ab-476a-a5ac-9feab48459e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# variables\n",
    "os.environ['OPENAI_API_KEY'] = ''\n",
    "openai.api_key = os.environ['OPENAI_API_KEY']\n",
    "embedding_function = OpenAIEmbeddings()\n",
    "llm = ChatOpenAI(model_name=\"gpt-4o-mini\", temperature=0)\n",
    "str_output_parser = StrOutputParser()\n",
    "user_query = \"What are the advantages of using RAG?\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d3ad428a-3eb6-40ec-a1a5-62565ead1e5b",
   "metadata": {},
   "outputs": [],
   "source": [
    "#### INDEXING ####"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "98ccda2c-0f4c-41c5-804d-2227cdf35aa7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load Documents\n",
    "loader = WebBaseLoader(\n",
    "    web_paths=(\"https://kbourne.github.io/chapter1.html\",), \n",
    "    bs_kwargs=dict(\n",
    "        parse_only=bs4.SoupStrainer(\n",
    "            class_=(\"post-content\", \"post-title\", \"post-header\")\n",
    "        )\n",
    "    ),\n",
    ")\n",
    "docs = loader.load()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "927a4c65-aa05-486c-8295-2f99673e7c20",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Split\n",
    "text_splitter = SemanticChunker(embedding_function)\n",
    "splits = text_splitter.split_documents(docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "6b13568c-d633-464d-8c43-0d55f34cc8c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Embed\n",
    "vectorstore = Chroma.from_documents(documents=splits, \n",
    "                                    embedding=embedding_function)\n",
    "\n",
    "retriever = vectorstore.as_retriever()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "6ce8df01-925b-45b5-8fb8-17b5c40c581f",
   "metadata": {},
   "outputs": [],
   "source": [
    "#### RETRIEVAL and GENERATION ####"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "eb47c817-b5ac-4d90-84ee-4cd209e52a80",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.11/site-packages/langsmith/client.py:323: LangSmithMissingAPIKeyWarning: API key must be provided when using hosted LangSmith API\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "# Prompt - ignore LangSmith warning, you will not need langsmith for this coding exercise\n",
    "prompt = hub.pull(\"jclemens24/rag-prompt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "e8975479-b3e3-481d-ad7b-08b4eb3faaef",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Post-processing\n",
    "def format_docs(docs):\n",
    "    return \"\\n\\n\".join(doc.page_content for doc in docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "fd9db713-f705-4b65-800e-2c4e3d0e4ef4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Chain it all together with LangChain\n",
    "rag_chain_from_docs = (\n",
    "    RunnablePassthrough.assign(context=(lambda x: format_docs(x[\"context\"])))\n",
    "    | prompt\n",
    "    | llm\n",
    "    | str_output_parser\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "dc5c2ab0-9191-40f7-abf2-681f1c751429",
   "metadata": {},
   "outputs": [],
   "source": [
    "rag_chain_with_source = RunnableParallel(\n",
    "    {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
    ").assign(answer=rag_chain_from_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "8b30177a-f9ab-45e4-812d-33b0f97325bd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'context': [Document(metadata={'source': 'https://kbourne.github.io/chapter1.html'}, page_content='Can you imagine what you could do with all of the benefits mentioned above, but combined with all of the data within your company, about everything your company has ever done, about your customers and all of their interactions, or about all of your products and services combined with a knowledge of what a specific customer’s needs are? You do not have to imagine it, that is what RAG does! Even smaller companies are not able to access much of their internal data resources very effectively. Larger companies are swimming in petabytes of data that is not readily accessible or is not being fully utilized. Prior to RAG, most of the services you saw that connected customers or employees with the data resources of the company were really just scratching the surface of what is possible compared to if they could access ALL of the data in the company. With the advent of RAG and generative AI in general, corporations are on the precipice of something really, really big. Comparing RAG with Model Fine-Tuning#\\nEstablished Large Language Models (LLM), what we call the foundation models, can learn in two ways:\\n Fine-tuning - With fine-tuning, you are adjusting the weights and/or biases that define the model\\'s intelligence based on new training data. This directly impacts the model, permanently changing how it will interact with new inputs. Input/Prompts - This is where you actually \"use\" the model, using the prompt/input to introduce new knowledge that the LLM can act upon. Why not use fine-tuning in all situations?'),\n",
       "  Document(metadata={'source': 'https://kbourne.github.io/chapter1.html'}, page_content='Maintaining this integration over time, especially as data sources evolve or expand, adds even more complexity and cost. Organizations need to invest in technical expertise and infrastructure to effectively leverage RAG capabilities while accounting for the rapid increase in complexities these systems bring with them. Potential for Information Overload: \\nIt is very possible for RAG-based systems to pull in too much information. It is just as important to implement mechanisms to address this issue as it is to handle times when not enough relevant information is found. Determining the relevance and importance of retrieved information to be included in the final output requires sophisticated filtering and ranking mechanisms. Without these, the quality of the generated content could be compromised by an excess of unnecessary or marginally relevant details. RAG Vocabulary#\\nNow is as good a time as any to review some vocabulary that should help you get familiar with the various concepts in RAG. This is not an exhaustive list, but understanding these core concepts should help you understand everything else we teach you about RAG in a more effective way:\\nLarge Language Model (LLM)\\nMost of this book will deal with LLMs. LLMs are generative AI technologies that focus on generating text.'),\n",
       "  Document(metadata={'source': 'https://kbourne.github.io/chapter1.html'}, page_content=\"\\n\\n      Introduction to Retrieval Augmented Generation (RAG)\\n    \\nDate: March 10, 2024  |  Estimated Reading Time: 15 min  |  Author: Keith Bourne\\n\\n  In the rapidly evolving field of artificial intelligence, Retrieval-Augmented Generation (RAG) is emerging as a significant addition to the Generative AI toolkit. RAG harnesses the strengths of Large Language Models (LLMs) and integrates them with internal data, offering a method to enhance organizational operations significantly. This book delves into the essential aspects of RAG, examining its role in augmenting the capabilities of LLMs and leveraging internal corporate data for strategic advantage. As it progresses, the book outlines the potential of RAG in business, suggesting how it can make AI applications smarter, more responsive, and aligned with organizational objectives. RAG is positioned as a key facilitator of customized, efficient, and insightful AI solutions, bridging the gap between Generative AI's potential and specific business needs. This exploration of RAG encourages readers to unlock the full potential of their corporate data, paving the way for an era of AI-driven innovation. What You Can Expect to Learn#\\nExpect to launch a comprehensive journey to understand and effectively incorporate Retrieval Augmented Generation (RAG) into AI systems. You'll explore a broad spectrum of essential topics, including vector databases, the vectorization process, vector search techniques, prompt engineering and design, and the use of AI agents for RAG applications, alongside methods for evaluating and visualizing RAG outcomes. Through practical, working code examples utilizing the latest tools and technologies like LangChain and Chroma's vector database, you'll gain hands-on experience in implementing RAG in your projects. At the outset, you'll delve into the core principles of RAG, appreciating its significance in the broader landscape of Generative AI. This foundational knowledge equips you with the perspective needed to discern how RAG applications are designed and why they succeed, paving the way for innovative solution development and problem-solving in AI. You'll discover the symbiosis between Large Language Models (LLMs) and internal data to bolster organizational operations. By learning about the intricacies of this integration, particularly the process of vectorization, including the creation and management of vector databases for efficient information retrieval, you'll gain crucial skills for navigating and harnessing vast data landscapes effectively in today's data-driven environments. Gain expertise in vector search techniques, an essential skill set for identifying pertinent data within extensive datasets. Coupled with this, you'll learn strategies for prompt engineering and design, ensuring that you can craft queries that elicit precise and relevant AI responses. Explore how AI agents play a pivotal role in RAG applications, facilitating sophisticated data interaction and retrieval tasks. You'll also learn methods for evaluating and visualizing RAG implementation outcomes, providing a framework for assessing performance and impact critically. Throughout this journey, you'll engage in practical, hands-on learning, guided through the use of cutting-edge tools like LangChain and Chroma's vector database, supported by real, working code examples. These detailed coding demonstrations, grounded in current frameworks, offer a practical foray into implementing RAG in AI systems, providing a rich learning experience. Case studies and coding exercises strategically interspersed throughout your learning path highlight the application of RAG in various real-world scenarios. These insights into addressing common and complex challenges prepare you to navigate the application of RAG across diverse settings with confidence. The code will build off the same starting use case provided in the next chapter. For each topic that relates to code, we will add code that shows how that topic impacts the RAG pipeline, giving you an in-depth understanding about how your coding choices can impact the capabilities of your RAG-based application. You'll also explore optimization strategies for data retrieval and enhancing the interpretability of AI-generated content. These insights are pivotal for improving the usability and effectiveness of AI applications, ensuring they are more aligned with strategic business objectives and user needs. As you progress, you'll gain a deeper understanding of how RAG can revolutionize AI applications, making them more intelligent, responsive, and tailored to specific requirements. The potential of RAG to facilitate personalized, efficient, and insightful AI solutions is thoroughly examined, bridging the theoretical and practical divides. Throughout this learning experience, a spirit of exploration and experimentation is encouraged, aiming to unlock the full potential of data through RAG, fostering innovation, and advancing the domain of AI-driven solutions. By the end, you will have gained comprehensive knowledge and practical skills in RAG, equipping you to contribute to the evolution of AI technologies and applications in your business and beyond. Understanding RAG: Basics and Principles#\\nModern day large language models (LLM) are impressive, but they have never seen your company’s private data (hopefully!). This means the ability of an LLM to help your company fully utilize its own data is very limited. This very large barrier has given rise to the concept of Retrieval Augmented Generation (RAG), where you are using the power and capabilities of the LLM, but combining it with the knowledge and data contained within your company’s internal data repositories. This is the primary motivation for using RAG, to make new data available to the LLM and significantly increase the value you can extract from that data. Beyond internal data, it is also useful in cases where the LLM has not been trained on the data, even if it is public, like the most recent research papers or articles about a topic that is strategic to your company. In both cases, we are talking about data that was not present during the training of the LLM. You can have the latest LLM trained on the most tokens ever, exceeding 10 trillion, but if that data was not present for the training, then the LLM will be at a disadvantage to help you reach your full productivity. Ultimately, this highlights the fact that for most organizations, connecting to data an LLM is not yet familiar with is a central need for them to fully utilize that LLM. RAG is the most popular paradigm for doing this. This book focuses on showing you how to set up a RAG application with your data, as well as how to get the most out of it in various situations. I intend to give you an in-depth understanding of RAG and its importance in leveraging LLM within the context of a company's private or specific data needs. Advantages of RAG#\\nPotential advantages of using RAG include improved accuracy and relevance, customization, flexibility, and expanding the model’s knowledge beyond the training data. Here is each advantage more thoroughly explored:\\n\\nImproved Accuracy and Relevance: RAG can significantly enhance the accuracy and relevance of responses that are generated by large language models (LLMs). RAG fetches and incorporates specific information from a database or dataset, typically in real time, and ensures that the output is based on both the model’s pre-existing knowledge and the most current and relevant data that you are providing directly. Customization and Flexibility: RAG can customize its responses based on your domain specific needs. By integrating a company's internal databases into the model's response generation process, RAG allows for outputs that are tailored to the unique context and requirements of the business. This level of customization is invaluable for creating personalized experiences and for applications requiring a high degree of specificity and detail. Expanding Model Knowledge Beyond Training Data: LLMs are limited by the scope of their training data. RAG overcomes this limitation by enabling models to access and utilize information that was not included in their initial training sets. This effectively expands the knowledge base of the model without the need for retraining, making LLMs more versatile and adaptable to new domains or rapidly evolving topics. Limitations of RAG#\\nBut there are some limitations to using RAG, which include dependency on the quality of the internal data, computational overhead, more complex integrations, and the potential for information overload. Dependency on Data Quality: \\nWhen talking about how data can impact an AI model, the saying in data science circles is “garbage in, garbage out.”  Meaning, if you give a model bad data, it will give you bad results. RAG is no different. The effectiveness of RAG is directly tied to the quality of the data it retrieves. If the underlying database or dataset contains outdated, biased, or inaccurate information, the outputs generated by RAG will likely suffer from the same issues. Need for Data Manipulation and Cleaning:\\nData in the recesses of the company often has a lot of value to it, but it is not often in good, accessible shape. For example, data from PDF-based customer statements needs a lot of massaging to get into a format that can be useful to a RAG pipeline. Computational Overhead: \\nA RAG pipeline introduces a host of new computational steps into the response generation process, including data retrieval, processing, and integration. LLMs are getting faster every day, but even the fastest response can be more than a second, and some can take several seconds. If you combine that with other data processing steps, and possibly multiple LLM calls, the result can be a very significant increase in the time it takes to receive a response. This all leads to increased computational overhead, affecting the efficiency and scalability of the entire system. As with any other IT initiative, an organization must balance the benefits of enhanced accuracy and customization against the resource requirements and potential latency introduced by these additional processes. Data Storage Explosion: Complexity in Integration and Maintenance: \\nTraditionally, your data resides in a data source which is queried in various ways to be made available to your internal and external systems. But with RAG, your data resides in multiple forms and locations, such as vectors in a vector database, that represent the same data, but in a different format. Add in the complexity of connecting these various data sources to LLMs and relevant technical mechanisms like vector searches, and you have a significant increase in complexity. This increased complexity can be resource-intensive.\"),\n",
       "  Document(metadata={'source': 'https://kbourne.github.io/chapter1.html'}, page_content=\"Vectors, Vectors, Vectors! A vector is a mathematical representation of your data. They are often referred to as the embeddings when talking specifically about natural language processing and LLMs. Vectors are one of the most important concepts to understand and there are many different parts of a RAG pipeline that utilize vectors. I felt it was bigger than just a quick definition, so I go into much more depth in the much larger next section dedicated to vectors. And beyond that, we literally spend two chapters (6 & 7) going over vectors and how they are used to find similar content. Vectors#\\nIt could be argued that understanding vectors and all the ways they are used in RAG is the most important part of this entire book. As mentioned above, vectors are simply the mathematical representations of your external data, and they are often referred to as embeddings. These representations capture semantic information in a format that can be processed by algorithms, facilitating tasks such as similarity search, which is a crucial step in the RAG process. Vectors typically have a specific dimension based on how many numbers are represented by them. For example, this is a 4 dimensional vector: [0.123, 0.321, 0.312, 0.231]\\n\\nIf you didn’t know we were talking about vectors and you saw this in Python code, you might recognize this as a list of 4 floating points, and you aren’t too far off. Typically though, when working with vectors in Python, you actually want to recognize them as a Numpy Array. Numpy Arrays are generally more machine learning friendly because they are optimized to be processed much faster and efficiently than python lists, and they are more broadly recognized as the defacto representation of embeddings across machine learning packages like SciPy, Pandas, Scikit-Learn, TensorFlow, Keras, Pytorch, and many others. Numpy also enables you to perform vectorized math directly on the Numpy Array, such as performing element-wise operations, without having to code in loops and other approaches you might have to use if using a different type of sequence. When working with vectors for vectorization, they are often hundreds, or thousands of dimensions, which refers to the number of floating points present in the vector. So a 1024 dimension vector literally has 1024 floating points in a Numpy Array. Higher dimensionality can capture more detailed semantic information, which is crucial for accurately matching query inputs with relevant documents or data in RAG applications. In chapter 7, we cover the key role vectors and vector databases play In RAG implementation. And then in chapter 8, we will dive more into the concept of similarity searches, which utilize vectors to conduct the search much faster and efficiently. These are key concepts that will help you gain a much deeper understanding into how to better implement a RAG pipeline. Implementing RAG in AI Applications#\\nRetrieval Augmented Generation (RAG) is rapidly becoming a cornerstone of GenAI platforms in the corporate world. RAG combines the power of information retrieval of internal or “new” data with generative language models to enhance the quality and relevance of generated text. This technique can be particularly useful for companies across various industries to improve their products, services, and operational efficiencies. Some examples of how RAG can be used include:\\n Customer Support and Chatbots - These can exist without RAG, but when integrated with RAG, it can connect those chatbots with past customer interactions, FAQs, support documents, and anything else that was specific to that customer. Automated Reporting - RAG can assist in creating initial drafts or summarizing existing articles, research papers, and other types of unstructured data into more digestible formats. Product Descriptions - For e-commerce companies, RAG can be used to help generate or enhance product descriptions by retrieving information from similar products or manufacturer specifications. Searchability and Utility of Internal Knowledge Bases - RAG can improve access to internal knowledge bases. This can be achieved through the generation of summaries of documents or by providing direct answers to queries based on the content of internal documents, emails, and other resources. Searchability and Utility of General Knowledge Bases - In areas like legal and compliance, where companies need to have an understanding of a massive and growing general knowledge base, RAG can be implemented to retrieve and summarize relevant laws, regulations, and compliance documents. Other areas where this is applicable include research and development, medical, academia, patents, and technical documents. Innovation Scouting - Similar to searching general knowledge bases, but with a focus on new innovation, companies can use RAG to scan and summarize information from quality sources to identify trends and potential areas for new innovations that are relevant to that company's specialization. Content Personalization - RAG can be used by media and content platforms to personalize content recommendations or create customized summaries by retrieving information based on a user's past interactions and preferences. Product Recommendations - RAG can be used by e-commerce sites to enhance product recommendation engines, generate personalized descriptions, or highlight features based on the browsing and purchasing history of customers. Training and Education - RAG can be used by education organizations and corporate training programs to generate or customize learning materials based on specific needs and knowledge levels of the learners. With RAG, a much deeper level of internal knowledge from the organization can be incorporated into the educational curriculum in very customized ways to the individual or role. This book will help you understand how you can implement all of these game-changing initiatives in your company. Comparing RAG with Conventional Generative AI#\\nConventional Generative AI has already shown to be a revolutionary change for companies, helping their employees reach new levels of productivity. LLMs like ChatGPT are assisting users with a rapidly growing list of applications that include writing business plans, writing and improving code, writing marketing copy, and even providing healthier recipes for a specific type of diet. Ultimately, much of what users are doing is getting done faster.\")],\n",
       " 'question': 'What are the advantages of using RAG?',\n",
       " 'answer': \"The advantages of using Retrieval-Augmented Generation (RAG) include:\\n\\n1. **Improved Accuracy and Relevance**: RAG enhances the accuracy and relevance of responses generated by large language models (LLMs) by incorporating specific, real-time information from databases or datasets, ensuring outputs are based on both the model's pre-existing knowledge and the most current data.\\n\\n2. **Customization and Flexibility**: RAG allows for tailored responses based on domain-specific needs by integrating a company's internal databases into the response generation process, creating personalized experiences and outputs that meet unique business requirements.\\n\\n3. **Expanding Model Knowledge Beyond Training Data**: RAG enables models to access and utilize information that was not included in their initial training sets, effectively expanding the model's knowledge base without the need for retraining, making LLMs more versatile and adaptable to new domains or rapidly evolving topics.\"}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Question - run the chain\n",
    "result = rag_chain_with_source.invoke(user_query)\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a742a6a8-d1ce-49ca-9493-ce2fc40e0bff",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
